library(tidyverse)
library(edgeR)
library(AnnotationHub)
library(magrittr)
library(scales)
library(GenomicRanges)
library(cqn)
library(fgsea)
library(kableExtra)
if (interactive()) setwd(here::here("R"))
theme_set(theme_bw())
nCores <- min(12, detectCores() - 1)
# # Get GC content and lengths
# gcTrans <- url("https://uofabioinformaticshub.github.io/Ensembl_GC/Release94/Danio_rerio.GRCz11.94.rds") %>%
# readRDS()
# # Convert to the gene-level with average length, average GC, max length, and GC of longest transcript for all transcripts assigned to a single gene.
# gcGene <- gcTrans %>%
# split(f = .$gene_id) %>%
# mclapply(function(x){
# gr <- reduce(x)
# df <- DataFrame(
# gene_id = unique(x$gene_id),
# gene_symbol = unique(x$gene_symbol),
# aveLength = round(mean(x$length),0) %>% as.integer(),
# aveGc = sum(x$length * x$gc) / sum(x$length),
# maxLength = max(x$length),
# longestGc = x$gc[which.max(x$length)[[1]]]
# )
# mcols(gr) <- df
# gr
# }, mc.cores = 4) %>%
# GRangesList() %>%
# unlist() %>%
# na.omit()
# # This takes a reasonable amount of time
# # Save as .rds file for faster loading
# saveRDS("../files/gcGene.rds")
gcGene <- readRDS("../files/gcGene.rds")
# Load id conversion file
idConvert <- read_csv2("../files/zf2human_withEntrezIDs.csv") %>%
dplyr::select(Geneid = zfID, EntrezID = Entrez) %>%
mutate(EntrezID = as.character(EntrezID))
# Create function to convert ids
convertHsEG2Dr <- function(ids, df = idConvert){
dplyr::filter(df, EntrezID %in% ids)$Geneid
}
# Conversion of zebrafish ensembl ID to zebrafish symbol, for plotting on network analyses
idConvertSymbol <- read_csv2("../files/zf2human_withEntrezIDs.csv") %>%
dplyr::select(label = zfID, symbol = zfName) %>%
na.omit() %>%
unique()
# Import hallmark human gene genesets and tidy the gene set names
# .gmt file downloaded from:
# http://software.broadinstitute.org/gsea/downloads.jsp
hallmark <- gmtPathways("../files/h.all.v6.2.entrez.gmt") %>%
mclapply(convertHsEG2Dr, mc.cores = 4) %>%
set_names(str_remove_all(names(.), "HALLMARK_"))
# Load counts analysed by feature counts
counts_19 <- read_tsv("../2_alignedData/featureCounts/genes.out") %>%
set_colnames(basename(colnames(.))) %>%
set_colnames(str_remove(colnames(.), "Aligned.sortedByCoord.out.bam")) %>%
dplyr::select(Geneid, starts_with("W"), starts_with("Q"))
# Create DGEList and calculate normalisaton factors
dgeList_19 <- counts_19 %>%
as.data.frame() %>%
column_to_rownames("Geneid") %>%
DGEList() %>%
calcNormFactors()
# Set group variable
dgeList_19$samples$group <- colnames(dgeList_19) %>%
str_extract("(W|Q)") %>%
factor(levels = c("W", "Q"))
# Add AnnotationHub and subset to search for zebrafish
ah <- AnnotationHub()
ah %>%
subset(species == "Danio rerio") %>%
subset(dataprovider == "Ensembl") %>%
subset(rdataclass == "EnsDb")
## AnnotationHub with 12 records
## # snapshotDate(): 2019-05-02
## # $dataprovider: Ensembl
## # $species: Danio rerio
## # $rdataclass: EnsDb
## # additional mcols(): taxonomyid, genome, description,
## # coordinate_1_based, maintainer, rdatadateadded, preparerclass, tags,
## # rdatapath, sourceurl, sourcetype
## # retrieve records with, e.g., 'object[["AH53201"]]'
##
## title
## AH53201 | Ensembl 87 EnsDb for Danio Rerio
## AH53705 | Ensembl 88 EnsDb for Danio Rerio
## AH56671 | Ensembl 89 EnsDb for Danio Rerio
## AH57746 | Ensembl 90 EnsDb for Danio Rerio
## AH60762 | Ensembl 91 EnsDb for Danio Rerio
## ... ...
## AH64906 | Ensembl 94 EnsDb for Danio rerio
## AH67932 | Ensembl 95 EnsDb for Danio rerio
## AH69169 | Ensembl 96 EnsDb for Danio rerio
## AH73861 | Ensembl 97 EnsDb for Danio rerio
## AH74989 | Ensembl 98 EnsDb for Danio rerio
# Select correct Ensembl release
ensDb <- ah[["AH64906"]]
# Extract GenomicRanges object from ensDb
genesGR <- genes(ensDb)
# Remove redundant columns from mcols
mcols(genesGR) <- mcols(genesGR)[c("gene_id", "gene_name",
"gene_biotype", "entrezid")] %>%
as.data.frame() %>%
dplyr::select(-entrezid) %>%
left_join(as.data.frame(mcols(gcGene))) %>%
distinct(gene_id, .keep_all = TRUE) %>%
set_rownames(.$gene_id) %>%
DataFrame() %>%
.[names(genesGR),]
# Add genesGR to DGEList using rownames of DGEList to reorder the genesGR
dgeList_19$genes <- genesGR[rownames(dgeList_19),]
# Create logical vector of genes to keep that fit criteria
genes2keep_19 <- dgeList_19 %>%
cpm() %>%
is_greater_than(1) %>%
rowSums() %>%
is_weakly_greater_than(4)
# Create new DGEList of genes fitting criteria
dgeFilt_19 <- dgeList_19[genes2keep_19,, keep.lib.sizes = FALSE] %>%
calcNormFactors()
# Create model matrix
design_19 <- model.matrix(~group, data = dgeFilt_19$samples)
# Perform DE test
topTable_19 <- estimateGLMCommonDisp(dgeFilt_19) %>%
glmFit(design_19) %>%
glmLRT(coef=2) %>%
topTags(n = Inf) %>%
.$table %>%
as_tibble() %>%
unite("Range", ID.start, ID.end, sep = "-") %>%
unite("Location", ID.seqnames, Range, ID.strand, sep = ":") %>%
set_colnames(str_remove(colnames(.), "ID.")) %>%
dplyr::select(
Geneid = gene_id,
Symbol = gene_name,
AveExpr = logCPM,
logFC,
P.Value = PValue,
FDR,
Location,
aveLength,
aveGc,
maxLength,
longestGc
) %>%
mutate(
DE = FDR < 0.05,
RankStat = -sign(logFC)*log10(P.Value)
)
if (!interactive()) {
topTable_19 %>%
dplyr::select(Geneid, Symbol, AveExpr, logFC, P.Value, FDR) %>%
dplyr::slice(1:20) %>%
kable(caption = "The 20 most differentially expressed genes") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
}
| Geneid | Symbol | AveExpr | logFC | P.Value | FDR |
|---|---|---|---|---|---|
| ENSDARG00000091368 | AL954327.1 | 2.656650 | -5.906385 | 0 | 0 |
| ENSDARG00000111860 | exorh | 3.993233 | 3.405441 | 0 | 0 |
| ENSDARG00000009637 | rcvrn3 | 2.423242 | 3.635172 | 0 | 0 |
| ENSDARG00000111827 | rcvrnb | 3.200713 | 3.212106 | 0 | 0 |
| ENSDARG00000059163 | rbp3 | 2.234735 | 3.296390 | 0 | 0 |
| ENSDARG00000098249 | asmt | 2.165764 | 3.257285 | 0 | 0 |
| ENSDARG00000002768 | pvalb2 | 2.238580 | -3.378321 | 0 | 0 |
| ENSDARG00000093753 | BX004774.2 | 2.161002 | 2.809252 | 0 | 0 |
| ENSDARG00000067990 | myhz1.1 | 2.622203 | -2.876345 | 0 | 0 |
| ENSDARG00000053254 | mylpfa | 2.393426 | -2.878776 | 0 | 0 |
| ENSDARG00000040565 | ckmb | 2.632848 | -2.787234 | 0 | 0 |
| ENSDARG00000024877 | ptgr1 | 4.624935 | 2.330074 | 0 | 0 |
| ENSDARG00000035327 | ckma | 2.350054 | -2.799236 | 0 | 0 |
| ENSDARG00000075963 | mhc1uba | 5.918828 | 2.189626 | 0 | 0 |
| ENSDARG00000099197 | actc1b | 3.642893 | -2.441845 | 0 | 0 |
| ENSDARG00000017624 | krt4 | 3.777492 | -2.393940 | 0 | 0 |
| ENSDARG00000017246 | prx | 2.325670 | -2.619766 | 0 | 0 |
| ENSDARG00000024789 | mxc | 1.817686 | 2.471397 | 0 | 0 |
| ENSDARG00000100397 | pde6c | 1.332726 | 2.436494 | 0 | 0 |
| ENSDARG00000035438 | myhc4 | 1.653235 | -2.433943 | 0 | 0 |
topTable_19 %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, logFC)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content",
title = "2019 data without cqn"
) +
scale_color_manual(values = c("grey70", "red"))
Comparison of logFC and GC content before using cqn
topTable_19 %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, RankStat)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content",
y = "Ranking Statistic",
title = "2019 data without cqn"
) +
scale_color_manual(values = c("grey70", "red"))
Comparison of ranking statistics and GC content before using cqn
topTable_19 %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, logFC)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "Gene length",
title = "2019 data without cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
topTable_19 %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, RankStat)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "Gene Length",
y = "Ranking Statistic",
title = "2019 data without cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
# Create named vector of gene level statistics
ranks_19 <- topTable_19 %>%
dplyr::arrange(RankStat) %>%
with(structure(RankStat, names = Geneid))
# Run GSEA
fgsea_19 <- fgsea(hallmark, ranks_19, nperm=1e5) %>%
as_tibble() %>%
dplyr::rename(FDR = padj) %>%
mutate(bonferroni = p.adjust(pval, "bonferroni")) %>%
dplyr::arrange(pval)
if (!interactive()) {
fgsea_19 %>%
dplyr::select(-leadingEdge) %>%
dplyr::slice(1:10) %>%
kable(caption = "The 10 most significantly enriched pathways") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
}
| pathway | pval | FDR | ES | NES | nMoreExtreme | size | bonferroni |
|---|---|---|---|---|---|---|---|
| INTERFERON_GAMMA_RESPONSE | 0.0000196 | 0.0004895 | 0.7628697 | 2.247436 | 0 | 200 | 0.0009786 |
| ALLOGRAFT_REJECTION | 0.0000196 | 0.0004895 | 0.7663245 | 2.251931 | 0 | 195 | 0.0009790 |
| MYOGENESIS | 0.0000615 | 0.0010255 | -0.7025634 | -2.160277 | 2 | 216 | 0.0030765 |
| INTERFERON_ALPHA_RESPONSE | 0.0038963 | 0.0487036 | 0.7494870 | 2.029541 | 193 | 83 | 0.1948143 |
| INFLAMMATORY_RESPONSE | 0.0049064 | 0.0490640 | 0.6412325 | 1.860664 | 248 | 173 | 0.2453202 |
| OXIDATIVE_PHOSPHORYLATION | 0.0200012 | 0.1656755 | 0.5569524 | 1.654355 | 1024 | 219 | 1.0000000 |
| KRAS_SIGNALING_DN | 0.0231946 | 0.1656755 | -0.5742175 | -1.710094 | 1144 | 155 | 1.0000000 |
| COMPLEMENT | 0.0270830 | 0.1692691 | 0.5554376 | 1.638361 | 1384 | 202 | 1.0000000 |
| ESTROGEN_RESPONSE_EARLY | 0.0365667 | 0.1985954 | -0.5167204 | -1.573585 | 1787 | 196 | 1.0000000 |
| MYC_TARGETS_V1 | 0.0397191 | 0.1985954 | 0.5272316 | 1.566617 | 2035 | 220 | 1.0000000 |
# Check which genes in dgeFilt have GC content and length in gcGene
genes2keep_19cqn <- rownames(dgeFilt_19) %in% mcols(gcGene)$gene_id
dgeFilt_19cqn <- dgeFilt_19[genes2keep_19cqn,, keep.lib.sizes = FALSE]
cqn_19 <- cqn(
dgeFilt_19cqn$counts,
x = mcols(dgeFilt_19cqn$genes)$aveGc,
lengths = mcols(dgeFilt_19cqn$genes)$aveLength,
sizeFactors = dgeFilt_19cqn$samples$lib.size
)
dgeFilt_19cqn$offset <- cqn_19$glm.offset
design_19cqn <- model.matrix(~ group, data = dgeFilt_19cqn$samples)
topTable_19cqn <- estimateGLMCommonDisp(dgeFilt_19cqn) %>%
glmFit(design_19cqn) %>%
glmLRT(coef=2) %>%
topTags(n = Inf) %>%
.$table %>%
as_tibble() %>%
unite("Range", ID.start, ID.end, sep = "-") %>%
unite("Location", ID.seqnames, Range, ID.strand, sep = ":") %>%
set_colnames(str_remove(colnames(.), "ID.")) %>%
dplyr::select(
Geneid = gene_id,
Symbol = gene_name,
AveExpr = logCPM,
logFC,
P.Value = PValue,
FDR,
Location,
aveLength,
aveGc,
maxLength,
longestGc
) %>%
mutate(
DE = FDR < 0.05,
RankStat = -sign(logFC)*log10(P.Value)
)
if (!interactive()) {
topTable_19cqn %>%
dplyr::select(Geneid, Symbol, AveExpr, logFC, P.Value, FDR) %>%
dplyr::slice(1:20) %>%
kable(caption = "The 20 most differentially expressed genes") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
}
| Geneid | Symbol | AveExpr | logFC | P.Value | FDR |
|---|---|---|---|---|---|
| ENSDARG00000111860 | exorh | 4.022712 | 3.436489 | 0 | 0 |
| ENSDARG00000009637 | rcvrn3 | 2.455907 | 3.559393 | 0 | 0 |
| ENSDARG00000111827 | rcvrnb | 3.231432 | 3.144728 | 0 | 0 |
| ENSDARG00000098249 | asmt | 2.198925 | 3.332647 | 0 | 0 |
| ENSDARG00000059163 | rbp3 | 2.267514 | 3.300725 | 0 | 0 |
| ENSDARG00000002768 | pvalb2 | 2.263883 | -3.393807 | 0 | 0 |
| ENSDARG00000024877 | ptgr1 | 4.651772 | 2.231027 | 0 | 0 |
| ENSDARG00000053254 | mylpfa | 2.418812 | -2.792908 | 0 | 0 |
| ENSDARG00000075963 | mhc1uba | 5.946182 | 2.151564 | 0 | 0 |
| ENSDARG00000040565 | ckmb | 2.658295 | -2.675660 | 0 | 0 |
| ENSDARG00000035327 | ckma | 2.375427 | -2.679857 | 0 | 0 |
| ENSDARG00000067990 | myhz1.1 | 2.647735 | -2.601809 | 0 | 0 |
| ENSDARG00000099197 | actc1b | 3.668376 | -2.325107 | 0 | 0 |
| ENSDARG00000017624 | krt4 | 3.802965 | -2.288732 | 0 | 0 |
| ENSDARG00000024789 | mxc | 1.847155 | 2.478310 | 0 | 0 |
| ENSDARG00000017246 | prx | 2.350904 | -2.400416 | 0 | 0 |
| ENSDARG00000100397 | pde6c | 1.366527 | 2.470467 | 0 | 0 |
| ENSDARG00000074345 | si:ch211-214b16.4 | 3.466160 | 1.917570 | 0 | 0 |
| ENSDARG00000068457 | tnnt3b | 1.919036 | -2.226481 | 0 | 0 |
| ENSDARG00000010729 | CABZ01073795.1 | 3.744797 | 1.772184 | 0 | 0 |
# Check similarity of genes
topTable_19cqn$Geneid[topTable_19cqn$DE == TRUE] %in%
topTable_19$Geneid[topTable_19$DE == TRUE] %>%
table()
## .
## FALSE TRUE
## 54 270
topTable_19cqn %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, logFC)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content",
title = "2019 data with cqn"
) +
scale_color_manual(values = c("grey70", "red"))
topTable_19cqn %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, RankStat)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content",
y = "Ranking Statistic",
title = "2019 data with cqn"
) +
scale_color_manual(values = c("grey70", "red"))
topTable_19cqn %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, logFC)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "Gene length",
title = "2019 data with cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
topTable_19cqn %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, RankStat)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "Gene Length",
y = "Ranking Statistic",
title = "2019 data with cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
# Create named vector of gene level statistics
ranks_19cqn <- topTable_19cqn %>%
dplyr::arrange(RankStat) %>%
with(structure(RankStat, names = Geneid))
# Run GSEA
fgsea_19cqn <- fgsea(hallmark, ranks_19cqn, nperm=1e5) %>%
as_tibble() %>%
dplyr::rename(FDR = padj) %>%
mutate(padj = p.adjust(pval, "bonferroni")) %>%
dplyr::arrange(pval)
if (!interactive()) {
fgsea_19cqn %>%
dplyr::select(-leadingEdge) %>%
dplyr::slice(1:10) %>%
kable(caption = "The 10 most significantly enriched pathways") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
}
| pathway | pval | FDR | ES | NES | nMoreExtreme | size | padj |
|---|---|---|---|---|---|---|---|
| INTERFERON_GAMMA_RESPONSE | 0.0000172 | 0.0006036 | 0.7892896 | 2.216671 | 0 | 200 | 0.0008585 |
| MYOGENESIS | 0.0000241 | 0.0006036 | -0.7137620 | -2.163269 | 0 | 216 | 0.0012072 |
| ALLOGRAFT_REJECTION | 0.0000860 | 0.0014330 | 0.7612808 | 2.134019 | 4 | 195 | 0.0042989 |
| INTERFERON_ALPHA_RESPONSE | 0.0015559 | 0.0194486 | 0.8074698 | 2.098253 | 85 | 83 | 0.0777943 |
| INFLAMMATORY_RESPONSE | 0.0095299 | 0.0952993 | 0.6522145 | 1.808917 | 548 | 173 | 0.4764963 |
| ESTROGEN_RESPONSE_EARLY | 0.0177184 | 0.1476531 | -0.5377855 | -1.612168 | 740 | 196 | 0.8859186 |
| KRAS_SIGNALING_DN | 0.0241824 | 0.1727315 | -0.5548583 | -1.627622 | 1032 | 155 | 1.0000000 |
| COMPLEMENT | 0.0336277 | 0.2101732 | 0.5816063 | 1.634663 | 1960 | 202 | 1.0000000 |
| HEME_METABOLISM | 0.0514493 | 0.2578441 | -0.4914861 | -1.472667 | 2152 | 195 | 1.0000000 |
| ESTROGEN_RESPONSE_LATE | 0.0515688 | 0.2578441 | -0.4914098 | -1.472438 | 2157 | 195 | 1.0000000 |
| Geneid | Symbol | AveExpr | logFC | P.Value | FDR |
|---|---|---|---|---|---|
| ENSDARG00000091368 | AL954327.1 | 2.656650 | -5.906385 | 0 | 0 |
| ENSDARG00000111860 | exorh | 3.993233 | 3.405441 | 0 | 0 |
| ENSDARG00000009637 | rcvrn3 | 2.423242 | 3.635172 | 0 | 0 |
| ENSDARG00000111827 | rcvrnb | 3.200713 | 3.212106 | 0 | 0 |
| ENSDARG00000059163 | rbp3 | 2.234735 | 3.296390 | 0 | 0 |
| ENSDARG00000098249 | asmt | 2.165764 | 3.257285 | 0 | 0 |
| ENSDARG00000002768 | pvalb2 | 2.238580 | -3.378321 | 0 | 0 |
| ENSDARG00000093753 | BX004774.2 | 2.161002 | 2.809252 | 0 | 0 |
| ENSDARG00000067990 | myhz1.1 | 2.622203 | -2.876345 | 0 | 0 |
| ENSDARG00000053254 | mylpfa | 2.393426 | -2.878776 | 0 | 0 |
| ENSDARG00000040565 | ckmb | 2.632848 | -2.787234 | 0 | 0 |
| ENSDARG00000024877 | ptgr1 | 4.624935 | 2.330074 | 0 | 0 |
| ENSDARG00000035327 | ckma | 2.350054 | -2.799236 | 0 | 0 |
| ENSDARG00000075963 | mhc1uba | 5.918828 | 2.189626 | 0 | 0 |
| ENSDARG00000099197 | actc1b | 3.642893 | -2.441845 | 0 | 0 |
| ENSDARG00000017624 | krt4 | 3.777492 | -2.393940 | 0 | 0 |
| ENSDARG00000017246 | prx | 2.325670 | -2.619766 | 0 | 0 |
| ENSDARG00000024789 | mxc | 1.817686 | 2.471397 | 0 | 0 |
| ENSDARG00000100397 | pde6c | 1.332726 | 2.436494 | 0 | 0 |
| ENSDARG00000035438 | myhc4 | 1.653235 | -2.433943 | 0 | 0 |
| Geneid | Symbol | AveExpr | logFC | P.Value | FDR |
|---|---|---|---|---|---|
| ENSDARG00000111860 | exorh | 4.022712 | 3.436489 | 0 | 0 |
| ENSDARG00000009637 | rcvrn3 | 2.455907 | 3.559393 | 0 | 0 |
| ENSDARG00000111827 | rcvrnb | 3.231432 | 3.144728 | 0 | 0 |
| ENSDARG00000098249 | asmt | 2.198925 | 3.332647 | 0 | 0 |
| ENSDARG00000059163 | rbp3 | 2.267514 | 3.300725 | 0 | 0 |
| ENSDARG00000002768 | pvalb2 | 2.263883 | -3.393807 | 0 | 0 |
| ENSDARG00000024877 | ptgr1 | 4.651772 | 2.231027 | 0 | 0 |
| ENSDARG00000053254 | mylpfa | 2.418812 | -2.792908 | 0 | 0 |
| ENSDARG00000075963 | mhc1uba | 5.946182 | 2.151564 | 0 | 0 |
| ENSDARG00000040565 | ckmb | 2.658295 | -2.675660 | 0 | 0 |
| ENSDARG00000035327 | ckma | 2.375427 | -2.679857 | 0 | 0 |
| ENSDARG00000067990 | myhz1.1 | 2.647735 | -2.601809 | 0 | 0 |
| ENSDARG00000099197 | actc1b | 3.668376 | -2.325107 | 0 | 0 |
| ENSDARG00000017624 | krt4 | 3.802965 | -2.288732 | 0 | 0 |
| ENSDARG00000024789 | mxc | 1.847155 | 2.478310 | 0 | 0 |
| ENSDARG00000017246 | prx | 2.350904 | -2.400416 | 0 | 0 |
| ENSDARG00000100397 | pde6c | 1.366527 | 2.470467 | 0 | 0 |
| ENSDARG00000074345 | si:ch211-214b16.4 | 3.466160 | 1.917570 | 0 | 0 |
| ENSDARG00000068457 | tnnt3b | 1.919036 | -2.226481 | 0 | 0 |
| ENSDARG00000010729 | CABZ01073795.1 | 3.744797 | 1.772184 | 0 | 0 |
| pathway | pval | FDR | ES | NES | nMoreExtreme | size | bonferroni |
|---|---|---|---|---|---|---|---|
| INTERFERON_GAMMA_RESPONSE | 0.0000196 | 0.0004895 | 0.7628697 | 2.247436 | 0 | 200 | 0.0009786 |
| ALLOGRAFT_REJECTION | 0.0000196 | 0.0004895 | 0.7663245 | 2.251931 | 0 | 195 | 0.0009790 |
| MYOGENESIS | 0.0000615 | 0.0010255 | -0.7025634 | -2.160277 | 2 | 216 | 0.0030765 |
| INTERFERON_ALPHA_RESPONSE | 0.0038963 | 0.0487036 | 0.7494870 | 2.029541 | 193 | 83 | 0.1948143 |
| INFLAMMATORY_RESPONSE | 0.0049064 | 0.0490640 | 0.6412325 | 1.860664 | 248 | 173 | 0.2453202 |
| OXIDATIVE_PHOSPHORYLATION | 0.0200012 | 0.1656755 | 0.5569524 | 1.654355 | 1024 | 219 | 1.0000000 |
| KRAS_SIGNALING_DN | 0.0231946 | 0.1656755 | -0.5742175 | -1.710094 | 1144 | 155 | 1.0000000 |
| COMPLEMENT | 0.0270830 | 0.1692691 | 0.5554376 | 1.638361 | 1384 | 202 | 1.0000000 |
| ESTROGEN_RESPONSE_EARLY | 0.0365667 | 0.1985954 | -0.5167204 | -1.573585 | 1787 | 196 | 1.0000000 |
| MYC_TARGETS_V1 | 0.0397191 | 0.1985954 | 0.5272316 | 1.566617 | 2035 | 220 | 1.0000000 |
| pathway | pval | FDR | ES | NES | nMoreExtreme | size | padj |
|---|---|---|---|---|---|---|---|
| INTERFERON_GAMMA_RESPONSE | 0.0000172 | 0.0006036 | 0.7892896 | 2.216671 | 0 | 200 | 0.0008585 |
| MYOGENESIS | 0.0000241 | 0.0006036 | -0.7137620 | -2.163269 | 0 | 216 | 0.0012072 |
| ALLOGRAFT_REJECTION | 0.0000860 | 0.0014330 | 0.7612808 | 2.134019 | 4 | 195 | 0.0042989 |
| INTERFERON_ALPHA_RESPONSE | 0.0015559 | 0.0194486 | 0.8074698 | 2.098253 | 85 | 83 | 0.0777943 |
| INFLAMMATORY_RESPONSE | 0.0095299 | 0.0952993 | 0.6522145 | 1.808917 | 548 | 173 | 0.4764963 |
| ESTROGEN_RESPONSE_EARLY | 0.0177184 | 0.1476531 | -0.5377855 | -1.612168 | 740 | 196 | 0.8859186 |
| KRAS_SIGNALING_DN | 0.0241824 | 0.1727315 | -0.5548583 | -1.627622 | 1032 | 155 | 1.0000000 |
| COMPLEMENT | 0.0336277 | 0.2101732 | 0.5816063 | 1.634663 | 1960 | 202 | 1.0000000 |
| HEME_METABOLISM | 0.0514493 | 0.2578441 | -0.4914861 | -1.472667 | 2152 | 195 | 1.0000000 |
| ESTROGEN_RESPONSE_LATE | 0.0515688 | 0.2578441 | -0.4914098 | -1.472438 | 2157 | 195 | 1.0000000 |
# Create object for converting transcripts to genes
transGR <- transcripts(ensDb)
mcols(transGR) <- mcols(transGR)[c("tx_id", "gene_id")]
trans2Gene <- tibble(
tx_id = transGR$tx_id,
gene_id = transGR$gene_id
)
# Load counts from 2017 data obtained as a .rds from Nhi
counts_17 <- read_rds("nhiData/6month_Normoxia.rds")
# Create DGEList
dgeList_17 <- counts_17$counts %>%
as.data.frame() %>%
rownames_to_column("tx_id") %>%
as_tibble() %>%
set_colnames(basename(colnames(.))) %>%
mutate(tx_id = str_remove_all(tx_id, "\\.[0-9]*$")) %>%
left_join(trans2Gene) %>%
group_by(gene_id) %>%
summarise_if(
.predicate = is.numeric,
.funs = sum
) %>%
as.data.frame() %>%
column_to_rownames("gene_id") %>%
round() %>%
set_colnames(str_remove(colnames(.), "[0-9]_MORGAN_6")) %>%
set_colnames(str_remove(colnames(.), "PN")) %>%
set_colnames(str_remove(colnames(.), "_[RS].+")) %>%
set_colnames(str_replace(colnames(.), "P", "W")) %>%
DGEList(
group = colnames(.) %>%
str_replace_all("([WQ])(.+)", "\\1") %>%
str_replace_all("W", "WT") %>%
str_replace_all("Q", "Mut") %>%
factor(levels = c("WT", "Mut")),
genes = genesGR[rownames(.)]
) %>%
calcNormFactors()
# Create logical vector of genes to keep that fit criteria
genes2keep_17 <- dgeList_17 %>%
cpm() %>%
is_greater_than(1) %>%
rowSums() %>%
is_weakly_greater_than(4)
# Create new DGEList of genes fitting criteria
dgeFilt_17 <- dgeList_17[genes2keep_17,, keep.lib.sizes = FALSE] %>%
calcNormFactors()
# Create model matrix
design_17 <- model.matrix(~group, data = dgeFilt_17$samples)
# Perform DE test
topTable_17 <- estimateGLMCommonDisp(dgeFilt_17) %>%
glmFit(design_17) %>%
glmLRT(coef=2) %>%
topTags(n = Inf) %>%
.$table %>%
as_tibble() %>%
unite("Range", start, end, sep = "-") %>%
unite("Location", seqnames, Range, strand, sep = ":") %>%
set_colnames(str_remove(colnames(.), "ID.")) %>%
dplyr::select(
Geneid = gene_id,
Symbol = gene_name,
AveExpr = logCPM,
logFC,
P.Value = PValue,
FDR,
Location,
aveLength,
aveGc,
maxLength,
longestGc
) %>%
mutate(
DE = FDR < 0.05,
RankStat = -sign(logFC)*log10(P.Value)
)
if (!interactive()) {
topTable_17 %>%
dplyr::select(Geneid, Symbol, AveExpr, logFC, P.Value, FDR) %>%
dplyr::slice(1:20) %>%
kable(caption = "The 20 most differentially expressed genes") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
}
| Geneid | Symbol | AveExpr | logFC | P.Value | FDR |
|---|---|---|---|---|---|
| ENSDARG00000080675 | si:dkey-71b5.7 | 3.0881824 | -3.097472 | 0 | 0 |
| ENSDARG00000058371 | krt5 | 4.6741362 | 2.449728 | 0 | 0 |
| ENSDARG00000112702 | BX537263.3 | 7.3996536 | -2.243755 | 0 | 0 |
| ENSDARG00000061585 | cyp4v7 | 2.3191654 | -3.139064 | 0 | 0 |
| ENSDARG00000087290 | si:ch211-202h22.10 | 0.6296277 | 5.591881 | 0 | 0 |
| ENSDARG00000036830 | krt91 | 3.9384441 | 2.120057 | 0 | 0 |
| ENSDARG00000074653 | si:ch211-233m11.1 | 2.6910751 | -2.277264 | 0 | 0 |
| ENSDARG00000093531 | slc37a4b | 0.4139975 | 3.306116 | 0 | 0 |
| ENSDARG00000031952 | mb | 2.5216783 | 1.889320 | 0 | 0 |
| ENSDARG00000036773 | s100s | 2.1504571 | 1.812137 | 0 | 0 |
| ENSDARG00000053480 | aqp9b | 1.7942745 | 1.755679 | 0 | 0 |
| ENSDARG00000096564 | CU915827.1 | 1.2875096 | 1.737070 | 0 | 0 |
| ENSDARG00000045367 | tuba1b | 6.6404569 | -1.126187 | 0 | 0 |
| ENSDARG00000091734 | traf3ip2l | 3.0840975 | -1.308960 | 0 | 0 |
| ENSDARG00000069093 | col2a1a | 2.6124607 | 1.350510 | 0 | 0 |
| ENSDARG00000020581 | otofb | 2.0502321 | 1.453107 | 0 | 0 |
| ENSDARG00000095578 | si:dkey-222h21.7 | 0.4110680 | 2.109993 | 0 | 0 |
| ENSDARG00000092604 | si:ch211-15j1.4 | 4.7446966 | 1.067221 | 0 | 0 |
| ENSDARG00000101598 | si:ch211-215p11.1 | 2.4986432 | 1.257070 | 0 | 0 |
| ENSDARG00000096273 | si:dkey-3n22.9 | 1.0162751 | 1.601951 | 0 | 0 |
topTable_17 %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, logFC)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content",
title = "2017 data without cqn"
) +
scale_color_manual(values = c("grey70", "red"))
Comparison of logFC and GC content before using cqn
topTable_17 %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, RankStat)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content",
y = "Ranking Statistic",
title = "2017 data without cqn"
) +
scale_color_manual(values = c("grey70", "red"))
Comparison of ranking statistics and GC content before using cqn
topTable_17 %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, logFC)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "Gene length",
title = "2017 data without cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
Comparison of logFC and GC content before using cqn
topTable_17 %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, RankStat)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "Gene Length",
y = "Ranking Statistic",
title = "2017 data without cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
Comparison of ranking statistics and GC content before using cqn
# Create named vector of gene level statistics
ranks_17 <- topTable_17 %>%
dplyr::arrange(RankStat) %>%
with(structure(RankStat, names = Geneid))
# Run GSEA
fgsea_17 <- fgsea(hallmark, ranks_17, nperm=1e5) %>%
as_tibble() %>%
dplyr::rename(FDR = padj) %>%
mutate(bonferroni = p.adjust(pval, "bonferroni")) %>%
dplyr::arrange(pval)
if (!interactive()) {
fgsea_17 %>%
dplyr::select(-leadingEdge) %>%
dplyr::slice(1:10) %>%
kable(caption = "The 10 most significantly enriched pathways") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
}
| pathway | pval | FDR | ES | NES | nMoreExtreme | size | bonferroni |
|---|---|---|---|---|---|---|---|
| ANGIOGENESIS | 0.0130782 | 0.3817766 | 0.7426339 | 1.885557 | 718 | 33 | 0.6539098 |
| MTORC1_SIGNALING | 0.0290776 | 0.3817766 | -0.4814323 | -1.643264 | 1033 | 229 | 1.0000000 |
| CHOLESTEROL_HOMEOSTASIS | 0.0359405 | 0.3817766 | -0.5582494 | -1.717796 | 1497 | 81 | 1.0000000 |
| MYOGENESIS | 0.0377238 | 0.3817766 | 0.4628162 | 1.504056 | 2426 | 225 | 1.0000000 |
| XENOBIOTIC_METABOLISM | 0.0456851 | 0.3817766 | -0.4664074 | -1.588547 | 1629 | 223 | 1.0000000 |
| EPITHELIAL_MESENCHYMAL_TRANSITION | 0.0458132 | 0.3817766 | 0.4525985 | 1.467107 | 2937 | 219 | 1.0000000 |
| COMPLEMENT | 0.0641052 | 0.4578941 | 0.4321599 | 1.396499 | 4095 | 213 | 1.0000000 |
| P53_PATHWAY | 0.0762101 | 0.4763131 | 0.4187876 | 1.356375 | 4883 | 217 | 1.0000000 |
| OXIDATIVE_PHOSPHORYLATION | 0.0909599 | 0.5053330 | -0.4158784 | -1.415911 | 3251 | 222 | 1.0000000 |
| MYC_TARGETS_V1 | 0.1304068 | 0.5644425 | -0.3686211 | -1.254655 | 4666 | 221 | 1.0000000 |
# Check which genes in dgeFilt have GC content and length in gcGene
genes2keep_17cqn <- rownames(dgeFilt_17) %in% mcols(gcGene)$gene_id
dgeFilt_17cqn <- dgeFilt_17[genes2keep_17cqn,, keep.lib.sizes = FALSE]
cqn_17 <- cqn(
dgeFilt_17cqn$counts,
x = dgeFilt_17cqn$genes$aveGc,
lengths = dgeFilt_17cqn$genes$aveLength,
sizeFactors = dgeFilt_17cqn$samples$lib.size
)
dgeFilt_17cqn$offset <- cqn_17$glm.offset
design_17cqn <- model.matrix(~ group, data = dgeFilt_17cqn$samples)
topTable_17cqn <- estimateGLMCommonDisp(dgeFilt_17cqn) %>%
glmFit(design_17cqn) %>%
glmLRT(coef=2) %>%
topTags(n = Inf) %>%
.$table %>%
as_tibble() %>%
unite("Range", start, end, sep = "-") %>%
unite("Location", seqnames, Range, strand, sep = ":") %>%
set_colnames(str_remove(colnames(.), "ID.")) %>%
dplyr::select(
Geneid = gene_id,
Symbol = gene_name,
AveExpr = logCPM,
logFC,
P.Value = PValue,
FDR,
Location,
aveLength,
aveGc,
maxLength,
longestGc
) %>%
mutate(
DE = FDR < 0.05,
RankStat = -sign(logFC)*log10(P.Value)
)
if (!interactive()) {
topTable_17cqn %>%
dplyr::select(Geneid, Symbol, AveExpr, logFC, P.Value, FDR) %>%
dplyr::slice(1:20) %>%
kable(caption = "The 20 most differentially expressed genes") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
}
| Geneid | Symbol | AveExpr | logFC | P.Value | FDR |
|---|---|---|---|---|---|
| ENSDARG00000080675 | si:dkey-71b5.7 | 3.0884180 | -3.155201 | 0 | 0 |
| ENSDARG00000058371 | krt5 | 4.6721670 | 2.468033 | 0 | 0 |
| ENSDARG00000061585 | cyp4v7 | 2.3275718 | -3.185475 | 0 | 0 |
| ENSDARG00000036830 | krt91 | 3.9359301 | 2.136633 | 0 | 0 |
| ENSDARG00000112702 | BX537263.3 | 7.3995256 | -1.893250 | 0 | 0 |
| ENSDARG00000087290 | si:ch211-202h22.10 | 0.6256422 | 5.183922 | 0 | 0 |
| ENSDARG00000031952 | mb | 2.5176367 | 1.904516 | 0 | 0 |
| ENSDARG00000036773 | s100s | 2.1465686 | 1.814317 | 0 | 0 |
| ENSDARG00000093531 | slc37a4b | 0.4110902 | 2.931494 | 0 | 0 |
| ENSDARG00000053480 | aqp9b | 1.7906320 | 1.767295 | 0 | 0 |
| ENSDARG00000074653 | si:ch211-233m11.1 | 2.6923337 | -1.673957 | 0 | 0 |
| ENSDARG00000091734 | traf3ip2l | 3.0851344 | -1.358915 | 0 | 0 |
| ENSDARG00000069093 | col2a1a | 2.6096957 | 1.377043 | 0 | 0 |
| ENSDARG00000045367 | tuba1b | 6.6405213 | -1.090817 | 0 | 0 |
| ENSDARG00000096564 | CU915827.1 | 1.2862394 | 1.715698 | 0 | 0 |
| ENSDARG00000020581 | otofb | 2.0468960 | 1.435909 | 0 | 0 |
| ENSDARG00000092604 | si:ch211-15j1.4 | 4.7443702 | 1.063000 | 0 | 0 |
| ENSDARG00000103494 | usp43a | 3.3189161 | 1.078400 | 0 | 0 |
| ENSDARG00000086819 | scn1laa | 2.0374308 | 1.278558 | 0 | 0 |
| ENSDARG00000105675 | BX005417.3 | 0.3569543 | 1.878356 | 0 | 0 |
# Check similarity of genes
topTable_17cqn$Geneid[topTable_17cqn$DE == TRUE] %in%
topTable_17$Geneid[topTable_17$DE == TRUE] %>%
table()
## .
## FALSE TRUE
## 28 187
topTable_17cqn %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, logFC)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content",
title = "2017 data with cqn"
) +
scale_color_manual(values = c("grey70", "red"))
Comparison of logFC and GC content after using cqn
topTable_17cqn %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, RankStat)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content",
y = "Ranking Statistic",
title = "2017 data with cqn"
) +
scale_color_manual(values = c("grey70", "red"))
Comparison of ranking statistics and GC content after using cqn
topTable_17cqn %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, logFC)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "Gene length",
title = "2017 data with cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
Comparison of logFC and GC content after using cqn
topTable_17cqn %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, RankStat)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "Gene Length",
y = "Ranking Statistic",
title = "2017 data with cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
Comparison of ranking statistics and GC content after using cqn
# Create named vector of gene level statistics
ranks_17cqn <- topTable_17cqn %>%
dplyr::arrange(RankStat) %>%
with(structure(RankStat, names = Geneid))
# Run GSEA
fgsea_17cqn <- fgsea(hallmark, ranks_17cqn, nperm=1e5) %>%
as_tibble() %>%
dplyr::rename(FDR = padj) %>%
mutate(padj = p.adjust(pval, "bonferroni")) %>%
dplyr::arrange(pval)
if (!interactive()) {
fgsea_17cqn %>%
dplyr::select(-leadingEdge) %>%
dplyr::slice(1:10) %>%
kable(caption = "The 10 most significantly enriched pathways") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
}
| pathway | pval | FDR | ES | NES | nMoreExtreme | size | padj |
|---|---|---|---|---|---|---|---|
| ANGIOGENESIS | 0.0103151 | 0.2741072 | 0.7551096 | 1.927551 | 585 | 33 | 0.5157543 |
| MYOGENESIS | 0.0136548 | 0.2741072 | 0.5052394 | 1.651414 | 929 | 225 | 0.6827392 |
| EPITHELIAL_MESENCHYMAL_TRANSITION | 0.0216541 | 0.2741072 | 0.4901491 | 1.598269 | 1471 | 219 | 1.0000000 |
| XENOBIOTIC_METABOLISM | 0.0322206 | 0.2741072 | -0.4806121 | -1.674239 | 1026 | 223 | 1.0000000 |
| CHOLESTEROL_HOMEOSTASIS | 0.0326067 | 0.2741072 | -0.5536507 | -1.739293 | 1260 | 81 | 1.0000000 |
| MTORC1_SIGNALING | 0.0328929 | 0.2741072 | -0.4754997 | -1.661084 | 1042 | 229 | 1.0000000 |
| COMPLEMENT | 0.0479712 | 0.3426513 | 0.4453016 | 1.447878 | 3249 | 213 | 1.0000000 |
| P53_PATHWAY | 0.0581726 | 0.3635788 | 0.4308126 | 1.403417 | 3947 | 217 | 1.0000000 |
| INFLAMMATORY_RESPONSE | 0.0894240 | 0.4569951 | -0.3958955 | -1.350610 | 3037 | 172 | 1.0000000 |
| OXIDATIVE_PHOSPHORYLATION | 0.0913990 | 0.4569951 | -0.4007950 | -1.395814 | 2916 | 222 | 1.0000000 |
| Geneid | Symbol | AveExpr | logFC | P.Value | FDR |
|---|---|---|---|---|---|
| ENSDARG00000080675 | si:dkey-71b5.7 | 3.0881824 | -3.097472 | 0 | 0 |
| ENSDARG00000058371 | krt5 | 4.6741362 | 2.449728 | 0 | 0 |
| ENSDARG00000112702 | BX537263.3 | 7.3996536 | -2.243755 | 0 | 0 |
| ENSDARG00000061585 | cyp4v7 | 2.3191654 | -3.139064 | 0 | 0 |
| ENSDARG00000087290 | si:ch211-202h22.10 | 0.6296277 | 5.591881 | 0 | 0 |
| ENSDARG00000036830 | krt91 | 3.9384441 | 2.120057 | 0 | 0 |
| ENSDARG00000074653 | si:ch211-233m11.1 | 2.6910751 | -2.277264 | 0 | 0 |
| ENSDARG00000093531 | slc37a4b | 0.4139975 | 3.306116 | 0 | 0 |
| ENSDARG00000031952 | mb | 2.5216783 | 1.889320 | 0 | 0 |
| ENSDARG00000036773 | s100s | 2.1504571 | 1.812137 | 0 | 0 |
| ENSDARG00000053480 | aqp9b | 1.7942745 | 1.755679 | 0 | 0 |
| ENSDARG00000096564 | CU915827.1 | 1.2875096 | 1.737070 | 0 | 0 |
| ENSDARG00000045367 | tuba1b | 6.6404569 | -1.126187 | 0 | 0 |
| ENSDARG00000091734 | traf3ip2l | 3.0840975 | -1.308960 | 0 | 0 |
| ENSDARG00000069093 | col2a1a | 2.6124607 | 1.350510 | 0 | 0 |
| ENSDARG00000020581 | otofb | 2.0502321 | 1.453107 | 0 | 0 |
| ENSDARG00000095578 | si:dkey-222h21.7 | 0.4110680 | 2.109993 | 0 | 0 |
| ENSDARG00000092604 | si:ch211-15j1.4 | 4.7446966 | 1.067221 | 0 | 0 |
| ENSDARG00000101598 | si:ch211-215p11.1 | 2.4986432 | 1.257070 | 0 | 0 |
| ENSDARG00000096273 | si:dkey-3n22.9 | 1.0162751 | 1.601951 | 0 | 0 |
| Geneid | Symbol | AveExpr | logFC | P.Value | FDR |
|---|---|---|---|---|---|
| ENSDARG00000080675 | si:dkey-71b5.7 | 3.0884180 | -3.155201 | 0 | 0 |
| ENSDARG00000058371 | krt5 | 4.6721670 | 2.468033 | 0 | 0 |
| ENSDARG00000061585 | cyp4v7 | 2.3275718 | -3.185475 | 0 | 0 |
| ENSDARG00000036830 | krt91 | 3.9359301 | 2.136633 | 0 | 0 |
| ENSDARG00000112702 | BX537263.3 | 7.3995256 | -1.893250 | 0 | 0 |
| ENSDARG00000087290 | si:ch211-202h22.10 | 0.6256422 | 5.183922 | 0 | 0 |
| ENSDARG00000031952 | mb | 2.5176367 | 1.904516 | 0 | 0 |
| ENSDARG00000036773 | s100s | 2.1465686 | 1.814317 | 0 | 0 |
| ENSDARG00000093531 | slc37a4b | 0.4110902 | 2.931494 | 0 | 0 |
| ENSDARG00000053480 | aqp9b | 1.7906320 | 1.767295 | 0 | 0 |
| ENSDARG00000074653 | si:ch211-233m11.1 | 2.6923337 | -1.673957 | 0 | 0 |
| ENSDARG00000091734 | traf3ip2l | 3.0851344 | -1.358915 | 0 | 0 |
| ENSDARG00000069093 | col2a1a | 2.6096957 | 1.377043 | 0 | 0 |
| ENSDARG00000045367 | tuba1b | 6.6405213 | -1.090817 | 0 | 0 |
| ENSDARG00000096564 | CU915827.1 | 1.2862394 | 1.715698 | 0 | 0 |
| ENSDARG00000020581 | otofb | 2.0468960 | 1.435909 | 0 | 0 |
| ENSDARG00000092604 | si:ch211-15j1.4 | 4.7443702 | 1.063000 | 0 | 0 |
| ENSDARG00000103494 | usp43a | 3.3189161 | 1.078400 | 0 | 0 |
| ENSDARG00000086819 | scn1laa | 2.0374308 | 1.278558 | 0 | 0 |
| ENSDARG00000105675 | BX005417.3 | 0.3569543 | 1.878356 | 0 | 0 |
| pathway | pval | FDR | ES | NES | nMoreExtreme | size | bonferroni |
|---|---|---|---|---|---|---|---|
| ANGIOGENESIS | 0.0130782 | 0.3817766 | 0.7426339 | 1.885557 | 718 | 33 | 0.6539098 |
| MTORC1_SIGNALING | 0.0290776 | 0.3817766 | -0.4814323 | -1.643264 | 1033 | 229 | 1.0000000 |
| CHOLESTEROL_HOMEOSTASIS | 0.0359405 | 0.3817766 | -0.5582494 | -1.717796 | 1497 | 81 | 1.0000000 |
| MYOGENESIS | 0.0377238 | 0.3817766 | 0.4628162 | 1.504056 | 2426 | 225 | 1.0000000 |
| XENOBIOTIC_METABOLISM | 0.0456851 | 0.3817766 | -0.4664074 | -1.588547 | 1629 | 223 | 1.0000000 |
| EPITHELIAL_MESENCHYMAL_TRANSITION | 0.0458132 | 0.3817766 | 0.4525985 | 1.467107 | 2937 | 219 | 1.0000000 |
| COMPLEMENT | 0.0641052 | 0.4578941 | 0.4321599 | 1.396499 | 4095 | 213 | 1.0000000 |
| P53_PATHWAY | 0.0762101 | 0.4763131 | 0.4187876 | 1.356375 | 4883 | 217 | 1.0000000 |
| OXIDATIVE_PHOSPHORYLATION | 0.0909599 | 0.5053330 | -0.4158784 | -1.415911 | 3251 | 222 | 1.0000000 |
| MYC_TARGETS_V1 | 0.1304068 | 0.5644425 | -0.3686211 | -1.254655 | 4666 | 221 | 1.0000000 |
| pathway | pval | FDR | ES | NES | nMoreExtreme | size | padj |
|---|---|---|---|---|---|---|---|
| ANGIOGENESIS | 0.0103151 | 0.2741072 | 0.7551096 | 1.927551 | 585 | 33 | 0.5157543 |
| MYOGENESIS | 0.0136548 | 0.2741072 | 0.5052394 | 1.651414 | 929 | 225 | 0.6827392 |
| EPITHELIAL_MESENCHYMAL_TRANSITION | 0.0216541 | 0.2741072 | 0.4901491 | 1.598269 | 1471 | 219 | 1.0000000 |
| XENOBIOTIC_METABOLISM | 0.0322206 | 0.2741072 | -0.4806121 | -1.674239 | 1026 | 223 | 1.0000000 |
| CHOLESTEROL_HOMEOSTASIS | 0.0326067 | 0.2741072 | -0.5536507 | -1.739293 | 1260 | 81 | 1.0000000 |
| MTORC1_SIGNALING | 0.0328929 | 0.2741072 | -0.4754997 | -1.661084 | 1042 | 229 | 1.0000000 |
| COMPLEMENT | 0.0479712 | 0.3426513 | 0.4453016 | 1.447878 | 3249 | 213 | 1.0000000 |
| P53_PATHWAY | 0.0581726 | 0.3635788 | 0.4308126 | 1.403417 | 3947 | 217 | 1.0000000 |
| INFLAMMATORY_RESPONSE | 0.0894240 | 0.4569951 | -0.3958955 | -1.350610 | 3037 | 172 | 1.0000000 |
| OXIDATIVE_PHOSPHORYLATION | 0.0913990 | 0.4569951 | -0.4007950 | -1.395814 | 2916 | 222 | 1.0000000 |